Enable additional balancer tests (those starting with `<!DOCTYPE html>`)

author C. Scott Ananian <cscott@cscott.net>

Fri, 15 Jul 2016 23:20:07 +0000 (19:20 -0400)

committer Tim Starling <tstarling@wikimedia.org>

Thu, 21 Jul 2016 03:44:17 +0000 (03:44 +0000)
author C. Scott Ananian <cscott@cscott.net>
Fri, 15 Jul 2016 23:20:07 +0000 (19:20 -0400)
committer Tim Starling <tstarling@wikimedia.org>
Thu, 21 Jul 2016 03:44:17 +0000 (03:44 +0000)
diff --git a/tests/phpunit/includes/tidy/BalancerTest.php b/tests/phpunit/includes/tidy/BalancerTest.php

index f2e41bd..740ddb9 100644 (file)
--- a/tests/phpunit/includes/tidy/BalancerTest.php
+++ b/tests/phpunit/includes/tidy/BalancerTest.php
@@ -48,15 +48,16 @@ class BalancerTest extends MediaWikiTestCase {
                 // for providers, and filter out HTML constructs which
                 // the balancer doesn't support.
                 $tests = [];
-               $start = '<html><head></head><body>';
-               $end = '</body></html>';
+               $okre = "~ \A
+                       (?i:<!DOCTYPE\ html>)?
+                       <html><head></head><body>
+                       .*
+                       </body></html>
+               \z ~xs";
                 foreach ( $json as $filename => $cases ) {
                         foreach ( $cases as $case ) {
                                 $html = $case['document']['html'];
-                               if (
-                                       substr( $html, 0, strlen( $start ) ) !== $start ||
-                                       substr( $html, -strlen( $end ) ) !== $end
-                               ) {
+                               if ( !preg_match( $okre, $html ) ) {
                                         // Skip tests which involve stuff in the <head> or
                                         // weird doctypes.
                                         continue;
@@ -70,6 +71,8 @@ class BalancerTest extends MediaWikiTestCase {
                                 $html = $case['document']['noQuirksBodyHtml'];
                                 // Normalize case of SVG attributes.
                                 $html = str_replace( 'foreignObject', 'foreignobject', $html );
+                               // Normalize case of MathML attributes.
+                               $html = str_replace( 'definitionURL', 'definitionurl', $html );
  
                                 if (
                                         isset( $case['document']['props']['comment'] ) &&
@@ -83,11 +86,17 @@ class BalancerTest extends MediaWikiTestCase {
                                         // Skip tests involving <![CDATA[ ]]> quoting.
                                         continue;
                                 }
-                               if ( stripos( $case['data'], '<!DOCTYPE' ) !== false ) {
-                                       // Skip tests involving doctypes.
+                               if (
+                                       stripos( $case['data'], '<!DOCTYPE' ) !== false &&
+                                       stripos( $case['data'], '<!DOCTYPE html>' ) === false
+                               ) {
+                                       // Skip tests involving unusual doctypes.
                                         continue;
                                 }
-                               if ( preg_match( ',</?(html|head|body|frame|plaintext)>|<rdar:|<isindex,i', $case['data'] ) ) {
+                               $literalre = "~ <rdar: | <isindex | < /? (
+                                       html | head | body | frame | frameset | plaintext
+                               ) > ~xi";
+                               if ( preg_match( $literalre, $case['data'] ) ) {
                                         // Skip tests involving some literal tags, which are
                                         // unsupported but don't show up in the expected output.
                                         continue;
@@ -119,7 +128,8 @@ class BalancerTest extends MediaWikiTestCase {
                                         isset( $case['document']['props']['tagWithLt'] ) ||
                                         isset( $case['document']['props']['attrWithFunnyChar'] ) ||
                                         preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
-                                       preg_match( ':</p<p>:', $case['data'] )
+                                       preg_match( ':</p<p>:', $case['data'] ) ||
+                                       preg_match( ':<b &=&amp>|<p/x/y/z>:', $case['data'] )
                                 ) {
                                         // Skip tests with funny tag or attribute names,
                                         // which are really tests of the HTML tokenizer, not
@@ -127,7 +137,7 @@ class BalancerTest extends MediaWikiTestCase {
                                         continue;
                                 }
                                 if (
-                                       stripos( $case['data'], 'encoding=" text/html "' ) !== false
+                                       preg_match( ':encoding=" text/html "|type=" hidden":', $case['data'] )
                                 ) {
                                         // The Sanitizer normalizes whitespace in attribute
                                         // values, which makes this test case invalid.
@@ -137,9 +147,12 @@ class BalancerTest extends MediaWikiTestCase {
                                         // Skip tests with ASCII null, etc.
                                         continue;
                                 }
+                               $data = preg_replace(
+                                       '~<!DOCTYPE html>~i', '', $case['data']
+                               );
                                 $tests[] = [
                                         $filename, # use better description?
-                                       $case['data'],
+                                       $data,
                                         $html
                                 ];
                         }
author	C. Scott Ananian <cscott@cscott.net>
	Fri, 15 Jul 2016 23:20:07 +0000 (19:20 -0400)
committer	Tim Starling <tstarling@wikimedia.org>
	Thu, 21 Jul 2016 03:44:17 +0000 (03:44 +0000)